3325.49+342.67
3204.26-3668.16
72/272
66-17
500/60
500/20
300/15
1.2/11
227/(227+186)
236/(236+187)
.92+65
.92+.65
((.85+.5)+(.92+.65)+.16)/3
.85+.5
library(tidyverse)
library(readxl)
d<-read_excel("~/Downloads/Attendance for Public Policy Process - Spring 2024.xlsx",sheet=6)
library(magrittr)
d%<>%pivot_longer(cols=select(Students:`...12`),values_to=students)
d%<>%pivot_longer(cols=select(`Students`:`...12`),values_to=students)
names(d)
?pivot_longer
d%<>%pivot_longer(`Students`:`...12`,values_to=students)
d%<>%pivot_longer(`Students`:`...12`,values_to="students")
d%<>%filter(!is.na(students))
View(d)
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=n_distinct())
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=())
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=n())
View(d)
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=n_distinct(students))
d<-read_excel("~/Downloads/Attendance for Public Policy Process - Spring 2024.xlsx",sheet=6)
d%<>%pivot_longer(`Students`:`...12`,values_to="students")
d%<>%filter(!is.na(students))
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=n_distinct(students))
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=count(students))
d<-read_excel("~/Downloads/Attendance for Public Policy Process - Spring 2024.xlsx",sheet=6)
d%<>%pivot_longer(`Students`:`...12`,values_to="students")
d%<>%filter(!is.na(students))
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=count(students))
?n_distinct
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
mutate(n=n_distinct(.))
View(d)
d%<>%group_by(Topic,`Recitation Section`,TA)%>%
count()
View(d)
d%<>%arrange(Topic,`Recitation Section`,TA)
write_csv(d,file="~/Library/CloudStorage/Box-Box/PPE3002/projectgroups.csv")
47/(9997+47)
193/(9997+193)
193/(1672+193)
5*12+9
2000/400
# preamble and load data ####
rm(list=ls())
library(foreign)
library(survey)
library(xtable)
library(reshape2)
library(plotrix)
library(readstata13)
library(fastDummies)
library(dotwhisker)
library(RColorBrewer)
library(magrittr)
library(estimatr)
library(randomizr)
library(sandwich)
library(tidyverse)
setwd("~/Dropbox/facebook sampling/replication/mexico replication")
paperfigs<-"../figures/"
data_exports<-"../data_exports/"
load("mexico data/mexico_facebook_cleaned.Rda")
names(mexico)
head(mexico$cell)
# cost analysis ####
centro<-read.csv("mexico data/mexico_adsets/Yale-Survey-Research-Lab-Ad-Sets-Lifetime (4).csv",stringsAsFactors = FALSE)
names(centro)
View(centro)
# time to complete statistics ####
mexico$Duration..in.seconds.<-as.numeric(as.character(mexico$Duration..in.seconds.))
quantile(mexico$Duration..in.seconds.) ## median response time: 385
quantile(mexico$Duration..in.seconds.)[3]/60
mean(mexico$Duration..in.seconds.)
mexico<-mexico%>%filter(Duration..in.seconds.>=quantile(mexico$Duration..in.seconds.)[3]/3) ## nobody filled out the survey in less than 1/3 median response time.
incompletes%<>%rename(cell=cell.x)
# Table 2: check cell targeting accuracy ####
## these are manually inputted into the table from on the output of sum(diag(table...)) entries below.
table(mexico$ageFB, mexico$agegroupselfreport)
sum(diag(table(mexico$ageFB,mexico$agegroupselfreport)))/sum(table(mexico$ageFB,mexico$agegroupselfreport))
mexico$genderselfreport <- as.character(mexico$genderselfreport)
table(mexico$genderFB, mexico$genderselfreport)
sum(diag(table(mexico$genderFB, mexico$genderselfreport)))/(sum(table(mexico$genderFB, mexico$genderselfreport))-23-27)
## create lowed category
incompletes%<>%
mutate(lowed=case_when(
grepl("9/9/19",StartDate)==TRUE~1,
edselfreport%in%c("nosecondary","secondary")~1,
edselfreport=="postsecondary"~0))
mexico%<>%
mutate(lowed=case_when(
grepl("9/9/19",StartDate)==TRUE~1,
edselfreport%in%c("nosecondary","secondary")~1,
edselfreport=="postsecondary"~0))
mexico.lowed<-mexico[grep("9/9/19",mexico$StartDate)[1]:nrow(mexico),] ## target by date fielded because we only used this targeting in the last part of our recruitment
mexico.lowed%<>%
mutate(edmatch2=ifelse(education%in%c("Ninguno","Primaria","Secundaria")|grepl("Bachillerato",education),1,0)) ## 1 indicates that self-reported education is highschool (bachillerato), technical/ professional school, but not university
prop.table(table(mexico.lowed$edmatch2))
table(mexico$geoFB, mexico$geoselfreport)
sum(diag(table(mexico$geoFB, mexico$geoselfreport)))/sum(table(mexico$geoFB, mexico$geoselfreport))
table(mexico$geoFB, mexico$geoselfreport)
# cost analysis ####
centro<-read.csv("mexico data/mexico_adsets/Yale-Survey-Research-Lab-Ad-Sets-Lifetime (4).csv",stringsAsFactors = FALSE)
other<-read.csv("mexico data/mexico_adsets/Yale-Survey-Research-Lab-Ad-Sets-Lifetime (5).csv",stringsAsFactors = FALSE)
centro2<-read.csv("mexico data/mexico_adsets/Yale-Survey-Research-Lab-Ad-Sets-Lifetime (7).csv",stringsAsFactors = FALSE)
other2<-read.csv("mexico data/mexico_adsets/Yale-Survey-Research-Lab-Ad-Sets-Lifetime (6).csv",stringsAsFactors = FALSE)
unique(other2$Ad.Set.Name)
full<-do.call(rbind,list(centro,centro2,other,other2))
sort(unique(full$Ad.Set.Name))
full$cell<-tolower(full$Ad.Set.Name)
full<-full%>%
mutate(cell=gsub("centro 1a","centro 1",cell),
cell=gsub("centro 1b","centro 1",cell),
cell=gsub("occidente 1a","occidente 1",cell),
cell=gsub("occidente 1b","occidente 1",cell),
cell=gsub("norte 1a","norte 1",cell),
cell=gsub("norte 1b","norte 1",cell),
cell=gsub("sur 1a","sur 1",cell),
cell=gsub("sur 1b","sur 1",cell),
cell=gsub("sur 1c","sur 1",cell),
cell=gsub("sur 1d","sur 1",cell),
cell=gsub("sur 1e","sur 1",cell))
## collapse cells that were combined
# full[grep("CENTRO 1",full$cell),"cell"]<-"CENTRO 1"
# full[grep("SUR 1",full$cell),"cell"]<-"SUR 1"
# full[grep("NORTE 1",full$cell),"cell"]<-"NORTE 1"
# full[grep("OCCIDENTE 1",full$cell),"cell"]<-"OCCIDENTE 1"
full[full$cell=="","cell"]<-"total"
length(unique(full$cell))
full.sum<-full%>%
group_by(cell)%>%
summarise(reach=sum(Reach,na.rm=TRUE),
impressions=sum(Impressions,na.rm=TRUE),
results=sum(Results,na.rm=TRUE),
spent=sum(Amount.Spent..USD.,na.rm=TRUE),
clicks=sum(Unique.Link.Clicks,na.rm=TRUE))%>%
mutate(clickthroughrate=clicks/impressions,
completerate=results/impressions,
costperclick=spent/clicks,
costpercomplete=spent/results)%>%
filter(cell!="total")
## Table S4: stats on reach, impressions, clicks, results, and spending ####
sum(full.sum$impressions)
sum(full.sum$reach)
sum(full.sum$clicks)
sum(full.sum$results)
sum(full.sum$spent)
sum(full.sum$clicks)/sum(full.sum$impressions)
sum(full.sum$results)/sum(full.sum$impressions)
sum(full.sum$spent)/sum(full.sum$clicks)
sum(full.sum$spent)/sum(full.sum$results)
## table S.5: click-through, completion, and cost across targeting cells ####
summary(full.sum$clickthroughrate)*100
summary(full.sum$completerate)*100
summary(full.sum$costperclick)
summary(full.sum$costpercomplete)
## Table S.6, stats for initial sample and low-ed oversample ####
## initial sample first:
initial<-rbind(centro,other)
initial$cell<-tolower(initial$Ad.Set.Name)
initial<-initial%>%
mutate(cell=gsub("centro 1a","centro 1",cell),
cell=gsub("centro 1b","centro 1",cell),
cell=gsub("occidente 1a","occidente 1",cell),
cell=gsub("occidente 1b","occidente 1",cell),
cell=gsub("norte 1a","norte 1",cell),
cell=gsub("norte 1b","norte 1",cell),
cell=gsub("sur 1a","sur 1",cell),
cell=gsub("sur 1b","sur 1",cell),
cell=gsub("sur 1c","sur 1",cell),
cell=gsub("sur 1d","sur 1",cell),
cell=gsub("sur 1e","sur 1",cell))
initial[initial$cell=="","cell"]<-"total"
initial.sum<-initial%>%
group_by(cell)%>%
summarise(reach=sum(Reach,na.rm=TRUE),
impressions=sum(Impressions,na.rm=TRUE),
results=sum(Results,na.rm=TRUE),
spent=sum(Amount.Spent..USD.,na.rm=TRUE),
clicks=sum(Unique.Link.Clicks,na.rm=TRUE))%>%
mutate(clickthroughrate=clicks/impressions,
completerate=results/impressions,
costperclick=spent/clicks,
costpercomplete=spent/results)%>%
filter(cell!="total")%>%
arrange(costpercomplete)
## stats for paper: Table S6
sum(initial.sum$reach)
sum(initial.sum$impressions)
sum(initial.sum$clicks)
sum(initial.sum$results)
sum(initial.sum$spent)
## Table S7
summary(initial.sum$clickthroughrate) * 100
summary(initial.sum$completerate)*100
summary(initial.sum$costperclick)
summary(initial.sum$costpercomplete)
## low education oversample next ####
over<-rbind(centro2,other2)
over$cell<-tolower(over$Ad.Set.Name)
over<-over%>%
mutate(cell=gsub("centro 1a","centro 1",cell),
cell=gsub("centro 1b","centro 1",cell),
cell=gsub("occidente 1a","occidente 1",cell),
cell=gsub("occidente 1b","occidente 1",cell),
cell=gsub("norte 1a","norte 1",cell),
cell=gsub("norte 1b","norte 1",cell),
cell=gsub("sur 1a","sur 1",cell),
cell=gsub("sur 1b","sur 1",cell),
cell=gsub("sur 1c","sur 1",cell),
cell=gsub("sur 1d","sur 1",cell),
cell=gsub("sur 1e","sur 1",cell))
over[over$cell=="","cell"]<-"total"
over.sum<-over%>%
group_by(cell)%>%
summarise(reach=sum(Reach,na.rm=TRUE),
impressions=sum(Impressions,na.rm=TRUE),
results=sum(Results,na.rm=TRUE),
spent=sum(Amount.Spent..USD.,na.rm=TRUE),
clicks=sum(Unique.Link.Clicks,na.rm=TRUE))%>%
mutate(clickthroughrate=clicks/impressions,
completerate=results/impressions,
costperclick=spent/clicks,
costpercomplete=spent/results)%>%
filter(cell!="total")%>%
arrange(desc(costpercomplete))
## stats for paper: table S6
sum(over.sum$reach)
sum(over.sum$impressions)
sum(over.sum$clicks)
sum(over.sum$results)
sum(over.sum$spent)
## table S7
summary(over.sum$clickthroughrate)*100
summary(over.sum$completerate)*100
summary(over.sum$costperclick)
summary(over.sum$costpercomplete)
unique(mexico$geoselfreport)
rural<-rbind(full[grep(" 1 ",full$cell),],
full[grep("2a",full$cell),])%>%
group_by(cell)%>%
summarise(results=sum(Results,na.rm=TRUE),
spent=sum(Amount.Spent..USD., na.rm=TRUE))%>%
mutate(costpercomplete=spent/results)%>%
filter(cell!="total")
View(rural)
rural<-rbind(full[grep(" 1 ",full$cell),],
full[grep("2a",full$cell),])%>%
# filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD., na.rm=TRUE))%>%
mutate(costpercomplete=spent/results)
rural<-rbind(full[grep(" 1 ",full$cell),],
full[grep("2a",full$cell),])%>%
# filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD., na.rm=TRUE))
sum(rural$spent)
sum(rural$spent)/nrow(mexico[which("1"%in%mexico$geoselfreport|"2a"%in%mexico$geoselfreport),])
nrow(mexico[which("1"%in%mexico$geoselfreport|"2a"%in%mexico$geoselfreport),])
mexico.rural<-mexico%>%filter(grepl("1",geoselfreport)==TRUE|
grepl("2a",geoselfreport)==TRUE)
unique(mexico.rural$geoselfreport)
unique(rural$cell)
## ratio of ad spend to these places to complete responses from these places
sum(rural$spent)/nrow(mexico.rural)
sum(rural$spent)
urban<-rbind(full[grep("3a",full$cell),],
full[grep("4a",full$cell),])%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(urban$spent)
sum(full$Amount.Spent..USD.)
unique(mexico$geoFB)
sort(unique(full$Ad.Set.Name))
sort(unique(full$cell))
sort(unique(mexico$geoselfreport))
sum(urban$spent)
sum(rural$spent)
urban<-rbind(full[grep("3a",full$cell),],
full[grep("4a",full$cell),])
rural<-rbind(full[grep(" 1 ",full$cell),],
full[grep("2a",full$cell),])
nrow(rural)+nrow(urban)
View(full%>%filter(grepl("3a",cell)==FALSE&grepl("4a",cell)==FALSE&grepl(" 1 ",cell)==FALSE&grepl("2a",cell)==FALSE))
spent=full%>%filter(cell=="total")
sum(spent$Amount.Spent..USD.)
sum(m$spent)+sum(f$spent)
m<-full%>%
filter(grepl("female",cell)==FALSE)%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
## cost by respondent type: Table S8
## this tells us the amount we spent advertising to each group
## divided by the number of people we recruited from each group
## (defined as complete responses)
w<-full%>%
filter(grepl("female",cell)==TRUE)%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(m$spent)+sum(f$spent)
sum(m$spent,f$spent)
sum(m$spent)+sum(w$spent)
sum(full$Amount.Spent..USD.)
View(w)
nrow(rural)+nrow(urban)
nrow(full)
nrow(w)+nrow(m)
w<-full%>%
filter(grepl("female",cell)==TRUE)
m<-full%>%
filter(grepl("female",cell)==FALSE)
nrow(m)+nrow(w)
sort(w$Ad.Set.Name)
sort(unique(w$cell))
m<-full%>%
filter(grepl("female",cell)==FALSE)%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
m<-full%>%
filter(grepl("female",cell)==FALSE)%>%
filter(cell!="total")
m<-full%>%
filter(grepl("female",cell)==FALSE)
m<-full%>%
filter(grepl("female",cell)==FALSE)%>%
filter(cell!="total")
View(m)
sort(unique(m$cell))
m<-full%>%
filter(grepl("female",cell)==FALSE)
sort(unique(m$cell))
## cost by respondent type: Table S8
## this tells us the amount we spent advertising to each group
## divided by the number of people we recruited from each group
## (defined as complete responses)
w<-full%>%
filter(grepl("female",cell)==TRUE)%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(w$spent)
m<-full%>%
filter(grepl("female",cell)==FALSE)%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(m$spent)
rural<-rbind(full[grep(" 1 ",full$cell),],
full[grep("2a",full$cell),])%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD., na.rm=TRUE))
urban<-rbind(full[grep("3a",full$cell),],
full[grep("4a",full$cell),])%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(urban$spent)
sum(rural$spent)
sum(urban$spent,rural$spent)
sum(m$spent,w$spent)
sum(w$spent)/nrow(mexico[which(mexico$genderselfreport=="F"),])
## cost by respondent type: Table S8
## this tells us the amount we spent advertising to each group
## divided by the number of people we recruited from each group
## (defined as complete responses)
w<-full%>%
filter(grepl("female",cell)==TRUE)%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(w$spent)/nrow(mexico[which(mexico$genderselfreport=="F"),])
## spend on ads targeting women divided by # of women respondents (self-reported)
sum(w$spent)/nrow(mexico[which(mexico$genderselfreport=="F"),])
m<-full%>%
filter(grepl("female",cell)==FALSE)%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
sum(m$spent)/nrow(mexico[which(mexico$genderselfreport=="M"),])
## spend on ads targeting men divided by # of men respondents (self-reported)
sum(m$spent)/nrow(mexico[which(mexico$genderselfreport=="M"),])
rural<-rbind(full[grep(" 1 ",full$cell),],
full[grep("2a",full$cell),])%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD., na.rm=TRUE))
## subset dataset to self-reports in the rural places
mexico.rural<-mexico%>%filter(grepl("1",geoselfreport)==TRUE|
grepl("2a",geoselfreport)==TRUE)
## ratio of ad spend to these places to complete responses from these places
sum(rural$spent)/nrow(mexico.rural) ## 0.19
urban<-rbind(full[grep("3a",full$cell),],
full[grep("4a",full$cell),])%>%
filter(cell!="total")%>%
group_by(cell)%>%
summarise(spent=sum(Amount.Spent..USD.,na.rm=TRUE))
mexico.urban<-mexico%>%filter(grepl("3a",geoselfreport)==TRUE|
grepl("4a",geoselfreport)==TRUE)
nrow(mexico.rural)+nrow(mexico.urban)
sum(urban$spent)/nrow(mexico.urban)
6128.57-3610.21
6128.57-3610.21-1096
